41f97ef5139vN42cOYHfX_Ac8WOOjA xen/arch/x86/vmx_platform.c
41c0c4128URE0dxcO15JME_MuKBPfg xen/arch/x86/vmx_vmcs.c
419cbedeQDg8IrO3izo3o5rQNlo0kQ xen/arch/x86/x86_32/asm-offsets.c
+4202391dkvdTZ8GhWXe3Gqf9EOgWXg xen/arch/x86/x86_32/domain_build.c
3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/arch/x86/x86_32/domain_page.c
3ddb79bcecupHj56ZbTa3B0FxDowMg xen/arch/x86/x86_32/entry.S
3ddb79bcHwuCQDjBICDTSis52hWguw xen/arch/x86/x86_32/mm.c
3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen/arch/x86/x86_32/usercopy.c
3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen/arch/x86/x86_32/xen.lds
41bf1717Ty3hwN3E9swdu8QfnvGqww xen/arch/x86/x86_64/asm-offsets.c
+4202391dA91ZovYX9d_5zJi9yGvLoQ xen/arch/x86/x86_64/domain_build.c
40e96d3aLDI-nViMuYneD7VKYlZrVg xen/arch/x86/x86_64/entry.S
41bf1717XhPz_dNT5OKSjgmbFuWBuA xen/arch/x86/x86_64/mm.c
42000d3cMb8o1WuFBXC07c8i3lPZBw xen/arch/x86/x86_64/traps.c
static int opt_noreboot = 0;
boolean_param("noreboot", opt_noreboot);
-#if !defined(CONFIG_X86_64BITMODE)
-/* No ring-3 access in initial page tables. */
-#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
-#else
-/* Allow ring-3 access in long mode as guest cannot use ring 1. */
-#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
-#endif
-#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
-#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
-#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
-
-#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
-#define round_pgdown(_p) ((_p)&PAGE_MASK)
-
static void default_idle(void)
{
__cli();
relinquish_list(d, &d->page_list);
}
-
-int construct_dom0(struct domain *p,
- unsigned long alloc_start,
- unsigned long alloc_end,
- char *image_start, unsigned long image_len,
- char *initrd_start, unsigned long initrd_len,
- char *cmdline)
-{
- char *dst;
- int i, rc;
- unsigned long pfn, mfn;
- unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT;
- unsigned long nr_pt_pages;
- unsigned long count;
- l2_pgentry_t *l2tab, *l2start;
- l1_pgentry_t *l1tab = NULL, *l1start = NULL;
- struct pfn_info *page = NULL;
- start_info_t *si;
- struct exec_domain *ed = p->exec_domain[0];
-
- /*
- * This fully describes the memory layout of the initial domain. All
- * *_start address are page-aligned, except v_start (and v_end) which are
- * superpage-aligned.
- */
- struct domain_setup_info dsi;
- unsigned long vinitrd_start;
- unsigned long vinitrd_end;
- unsigned long vphysmap_start;
- unsigned long vphysmap_end;
- unsigned long vstartinfo_start;
- unsigned long vstartinfo_end;
- unsigned long vstack_start;
- unsigned long vstack_end;
- unsigned long vpt_start;
- unsigned long vpt_end;
- unsigned long v_end;
-
- /* Machine address of next candidate page-table page. */
- unsigned long mpt_alloc;
-
- extern void physdev_init_dom0(struct domain *);
-
- /* Sanity! */
- if ( p->id != 0 )
- BUG();
- if ( test_bit(DF_CONSTRUCTED, &p->d_flags) )
- BUG();
-
- memset(&dsi, 0, sizeof(struct domain_setup_info));
-
- printk("*** LOADING DOMAIN 0 ***\n");
-
- /*
- * This is all a bit grim. We've moved the modules to the "safe" physical
- * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this
- * routine we're going to copy it down into the region that's actually
- * been allocated to domain 0. This is highly likely to be overlapping, so
- * we use a forward copy.
- *
- * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with
- * 4GB and lots of network/disk cards that allocate loads of buffers.
- * We'll have to revisit this if we ever support PAE (64GB).
- */
-
- rc = parseelfimage(image_start, image_len, &dsi);
- if ( rc != 0 )
- return rc;
-
- /* Set up domain options */
- if ( dsi.use_writable_pagetables )
- vm_assist(p, VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
-
- if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
- {
- printk("Initial guest OS must load to a page boundary.\n");
- return -EINVAL;
- }
-
- /*
- * Why do we need this? The number of page-table frames depends on the
- * size of the bootstrap address space. But the size of the address space
- * depends on the number of page-table frames (since each one is mapped
- * read-only). We have a pair of simultaneous equations in two unknowns,
- * which we solve by exhaustive search.
- */
- vinitrd_start = round_pgup(dsi.v_kernend);
- vinitrd_end = vinitrd_start + initrd_len;
- vphysmap_start = round_pgup(vinitrd_end);
- vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long));
- vpt_start = round_pgup(vphysmap_end);
- for ( nr_pt_pages = 2; ; nr_pt_pages++ )
- {
- vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
- vstartinfo_start = vpt_end;
- vstartinfo_end = vstartinfo_start + PAGE_SIZE;
- vstack_start = vstartinfo_end;
- vstack_end = vstack_start + PAGE_SIZE;
- v_end = (vstack_end + (1<<22)-1) & ~((1<<22)-1);
- if ( (v_end - vstack_end) < (512 << 10) )
- v_end += 1 << 22; /* Add extra 4MB to get >= 512kB padding. */
- if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >>
- L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
- break;
- }
-
- printk("PHYSICAL MEMORY ARRANGEMENT:\n"
- " Kernel image: %p->%p\n"
- " Initrd image: %p->%p\n"
- " Dom0 alloc.: %08lx->%08lx\n",
- image_start, image_start + image_len,
- initrd_start, initrd_start + initrd_len,
- alloc_start, alloc_end);
- printk("VIRTUAL MEMORY ARRANGEMENT:\n"
- " Loaded kernel: %08lx->%08lx\n"
- " Init. ramdisk: %08lx->%08lx\n"
- " Phys-Mach map: %08lx->%08lx\n"
- " Page tables: %08lx->%08lx\n"
- " Start info: %08lx->%08lx\n"
- " Boot stack: %08lx->%08lx\n"
- " TOTAL: %08lx->%08lx\n",
- dsi.v_kernstart, dsi.v_kernend,
- vinitrd_start, vinitrd_end,
- vphysmap_start, vphysmap_end,
- vpt_start, vpt_end,
- vstartinfo_start, vstartinfo_end,
- vstack_start, vstack_end,
- dsi.v_start, v_end);
- printk(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
-
- if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
- {
- printk("Initial guest OS requires too much space\n"
- "(%luMB is greater than %luMB limit)\n",
- (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
- return -ENOMEM;
- }
-
- /*
- * Protect the lowest 1GB of memory. We use a temporary mapping there
- * from which we copy the kernel and ramdisk images.
- */
- if ( dsi.v_start < (1<<30) )
- {
- printk("Initial loading isn't allowed to lowest 1GB of memory.\n");
- return -EINVAL;
- }
-
- /* Paranoia: scrub DOM0's memory allocation. */
- printk("Scrubbing DOM0 RAM: ");
- dst = (char *)alloc_start;
- while ( dst < (char *)alloc_end )
- {
-#define SCRUB_BYTES (100 * 1024 * 1024) /* 100MB */
- printk(".");
- touch_nmi_watchdog();
- if ( ((char *)alloc_end - dst) > SCRUB_BYTES )
- {
- memset(dst, 0, SCRUB_BYTES);
- dst += SCRUB_BYTES;
- }
- else
- {
- memset(dst, 0, (char *)alloc_end - dst);
- break;
- }
- }
- printk("done.\n");
-
- /* Construct a frame-allocation list for the initial domain. */
- for ( mfn = (alloc_start>>PAGE_SHIFT);
- mfn < (alloc_end>>PAGE_SHIFT);
- mfn++ )
- {
- page = &frame_table[mfn];
- page->u.inuse.domain = p;
- page->u.inuse.type_info = 0;
- page->count_info = PGC_allocated | 1;
- list_add_tail(&page->list, &p->page_list);
- p->tot_pages++; p->max_pages++;
- }
-
- mpt_alloc = (vpt_start - dsi.v_start) + alloc_start;
-
- SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES);
- SET_GDT_ADDRESS(ed, DEFAULT_GDT_ADDRESS);
-
- /*
- * We're basically forcing default RPLs to 1, so that our "what privilege
- * level are we returning to?" logic works.
- */
- ed->thread.failsafe_selector = FLAT_GUESTOS_CS;
- ed->thread.event_selector = FLAT_GUESTOS_CS;
- ed->thread.guestos_ss = FLAT_GUESTOS_DS;
- for ( i = 0; i < 256; i++ )
- ed->thread.traps[i].cs = FLAT_GUESTOS_CS;
-
- /* WARNING: The new domain must have its 'processor' field filled in! */
- l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
- memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
- l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR);
- l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(p->mm_perdomain_pt) | __PAGE_HYPERVISOR);
- ed->mm.pagetable = mk_pagetable((unsigned long)l2start);
-
- l2tab += l2_table_offset(dsi.v_start);
- mfn = alloc_start >> PAGE_SHIFT;
- for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
- {
- if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
- {
- l1start = l1tab = (l1_pgentry_t *)mpt_alloc;
- mpt_alloc += PAGE_SIZE;
- *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT);
- clear_page(l1tab);
- if ( count == 0 )
- l1tab += l1_table_offset(dsi.v_start);
- }
- *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT);
-
- page = &frame_table[mfn];
- if ( !get_page_and_type(page, p, PGT_writable_page) )
- BUG();
-
- mfn++;
- }
-
- /* Pages that are part of page tables must be read only. */
- l2tab = l2start + l2_table_offset(vpt_start);
- l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
- l1tab += l1_table_offset(vpt_start);
- l2tab++;
- for ( count = 0; count < nr_pt_pages; count++ )
- {
- *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
- page = &frame_table[l1_pgentry_to_pagenr(*l1tab)];
- if ( count == 0 )
- {
- page->u.inuse.type_info &= ~PGT_type_mask;
- page->u.inuse.type_info |= PGT_l2_page_table;
-
- /*
- * No longer writable: decrement the type_count.
- * Installed as CR3: increment both the ref_count and type_count.
- * Net: just increment the ref_count.
- */
- get_page(page, p); /* an extra ref because of readable mapping */
-
- /* Get another ref to L2 page so that it can be pinned. */
- if ( !get_page_and_type(page, p, PGT_l2_page_table) )
- BUG();
- set_bit(_PGT_pinned, &page->u.inuse.type_info);
- }
- else
- {
- page->u.inuse.type_info &= ~PGT_type_mask;
- page->u.inuse.type_info |= PGT_l1_page_table;
- page->u.inuse.type_info |=
- ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<<PGT_va_shift;
-
- /*
- * No longer writable: decrement the type_count.
- * This is an L1 page, installed in a validated L2 page:
- * increment both the ref_count and type_count.
- * Net: just increment the ref_count.
- */
- get_page(page, p); /* an extra ref because of readable mapping */
- }
- l1tab++;
- if( !((unsigned long)l1tab & (PAGE_SIZE - 1)) )
- l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
- }
-
- /* Set up shared-info area. */
- update_dom_time(p);
- p->shared_info->domain_time = 0;
- /* Mask all upcalls... */
- for ( i = 0; i < MAX_VIRT_CPUS; i++ )
- p->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
- p->shared_info->n_vcpu = smp_num_cpus;
-
- /* Install the new page tables. */
- __cli();
- write_ptbase(&ed->mm);
-
- /* Copy the OS image. */
- (void)loadelfimage(image_start);
-
- /* Copy the initial ramdisk. */
- if ( initrd_len != 0 )
- memcpy((void *)vinitrd_start, initrd_start, initrd_len);
-
- /* Set up start info area. */
- si = (start_info_t *)vstartinfo_start;
- memset(si, 0, PAGE_SIZE);
- si->nr_pages = p->tot_pages;
- si->shared_info = virt_to_phys(p->shared_info);
- si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
- si->pt_base = vpt_start;
- si->nr_pt_frames = nr_pt_pages;
- si->mfn_list = vphysmap_start;
-
- /* Write the phys->machine and machine->phys table entries. */
- for ( pfn = 0; pfn < p->tot_pages; pfn++ )
- {
- mfn = pfn + (alloc_start>>PAGE_SHIFT);
-#ifndef NDEBUG
-#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT)
- if ( pfn > REVERSE_START )
- mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START);
-#endif
- ((unsigned long *)vphysmap_start)[pfn] = mfn;
- machine_to_phys_mapping[mfn] = pfn;
- }
-
- if ( initrd_len != 0 )
- {
- si->mod_start = vinitrd_start;
- si->mod_len = initrd_len;
- printk("Initrd len 0x%lx, start at 0x%08lx\n",
- si->mod_len, si->mod_start);
- }
-
- dst = si->cmd_line;
- if ( cmdline != NULL )
- {
- for ( i = 0; i < 255; i++ )
- {
- if ( cmdline[i] == '\0' )
- break;
- *dst++ = cmdline[i];
- }
- }
- *dst = '\0';
-
- /* Reinstate the caller's page tables. */
- write_ptbase(¤t->mm);
- __sti();
-
- /* Destroy low mappings - they were only for our convenience. */
- for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE )
- l2start[i] = mk_l2_pgentry(0);
- zap_low_mappings(); /* Do the same for the idle page tables. */
-
- /* DOM0 gets access to everything. */
- physdev_init_dom0(p);
-
- set_bit(DF_CONSTRUCTED, &p->d_flags);
-
- new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start);
-
-#if 0 /* XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) */
- shadow_lock(&p->mm);
- shadow_mode_enable(p, SHM_test);
- shadow_unlock(&p->mm);
-#endif
-
- return 0;
-}
if ( !(l1v & _PAGE_PRESENT) || !pfn_is_ram(pfn) )
return;
- e = page->u.inuse.domain;
+ e = page_get_owner(page);
if ( unlikely(e != d) )
{
/*
static int alloc_l2_table(struct pfn_info *page)
{
- struct domain *d = page->u.inuse.domain;
+ struct domain *d = page_get_owner(page);
unsigned long page_nr = page_to_pfn(page);
l2_pgentry_t *pl2e;
int i;
pl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
mk_l2_pgentry((page_nr << PAGE_SHIFT) | __PAGE_HYPERVISOR);
pl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(page->u.inuse.domain->mm_perdomain_pt) |
+ mk_l2_pgentry(__pa(page_get_owner(page)->mm_perdomain_pt) |
__PAGE_HYPERVISOR);
#endif
static int alloc_l1_table(struct pfn_info *page)
{
- struct domain *d = page->u.inuse.domain;
+ struct domain *d = page_get_owner(page);
unsigned long page_nr = page_to_pfn(page);
l1_pgentry_t *pl1e;
int i;
static void free_l1_table(struct pfn_info *page)
{
- struct domain *d = page->u.inuse.domain;
+ struct domain *d = page_get_owner(page);
unsigned long page_nr = page - frame_table;
l1_pgentry_t *pl1e;
int i;
void free_page_type(struct pfn_info *page, unsigned int type)
{
- struct domain *d = page->u.inuse.domain;
+ struct domain *d = page_get_owner(page);
switch ( type )
{
* See domain.c:relinquish_list().
*/
ASSERT((x & PGT_validated) ||
- test_bit(DF_DYING, &page->u.inuse.domain->d_flags));
+ test_bit(DF_DYING, &page_get_owner(page)->d_flags));
if ( unlikely((nx & PGT_count_mask) == 0) )
{
* may be unnecessary (e.g., page was GDT/LDT) but those
* circumstances should be very rare.
*/
- struct domain *d = page->u.inuse.domain;
+ struct domain *d = page_get_owner(page);
if ( unlikely(NEED_FLUSH(tlbflush_time[d->exec_domain[0]->processor],
page->tlbflush_timestamp)) )
{
if ( unlikely(!(okay = get_page_from_pagenr(pfn, FOREIGNDOM))) )
{
MEM_LOG("Page %08lx bad domain (dom=%p)",
- ptr, page->u.inuse.domain);
+ ptr, page_get_owner(page));
}
else if ( likely(test_and_clear_bit(_PGT_pinned,
&page->u.inuse.type_info)) )
* benign reference to the page (PGC_allocated). If that reference
* disappears then the deallocation routine will safely spin.
*/
- nd = page->u.inuse.domain;
+ nd = page_get_owner(page);
y = page->count_info;
do {
x = y;
if ( unlikely(e->tot_pages++ == 0) )
get_knownalive_domain(e);
list_add_tail(&page->list, &e->page_list);
- page->u.inuse.domain = e;
+ page_set_owner(page, e);
spin_unlock(&e->page_alloc_lock);
* benign reference to the page (PGC_allocated). If that reference
* disappears then the deallocation routine will safely spin.
*/
- nd = page->u.inuse.domain;
+ nd = page_get_owner(page);
y = page->count_info;
do {
x = y;
pfn = list_entry(list_ent, struct pfn_info, list) - frame_table;
page = &frame_table[pfn];
- if ( page->u.inuse.domain != d )
+ if ( page_get_owner(page) != d )
BUG();
if ( (page->u.inuse.type_info & PGT_count_mask) >
pfn = list_entry(list_ent, struct pfn_info, list) - frame_table;
page = &frame_table[pfn];
- if ( page->u.inuse.domain != d )
+ if ( page_get_owner(page) != d )
BUG();
switch ( page->u.inuse.type_info & PGT_type_mask )
unsigned long l1pfn = pt[i]>>PAGE_SHIFT;
struct pfn_info *l1page = &frame_table[l1pfn];
- if ( l1page->u.inuse.domain != d )
+ if ( page_get_owner(l1page) != d )
{
printk("L2: Skip bizarre page belonging to other "
- "dom %p\n", l1page->u.inuse.domain);
+ "dom %p\n", page_get_owner(l1page));
continue;
}
}
- if ( l1page->u.inuse.domain != d )
+ if ( page_get_owner(l1page) != d )
{
- printk("Audit %d: [%lx,%x] Skip foreign page dom=%lx "
+ printk("Audit %d: [%lx,%x] Skip foreign page dom=%p "
"pfn=%lx c=%08x t=%08x m2p=%lx\n",
d->id, pfn, i,
- (unsigned long)l1page->u.inuse.domain,
+ page_get_owner(l1page),
l1pfn,
l1page->count_info,
l1page->u.inuse.type_info,
unsigned long l1pfn = pt[i]>>PAGE_SHIFT;
struct pfn_info *l1page = &frame_table[l1pfn];
- if ( l1page->u.inuse.domain == d)
+ if ( page_get_owner(l1page) == d )
adjust(l1page, 1, 1);
}
}
unsigned long l1pfn = pt[i]>>PAGE_SHIFT;
struct pfn_info *l1page = &frame_table[l1pfn];
- if ( (l1page->u.inuse.domain != d) ||
+ if ( (page_get_owner(l1page) != d) ||
(l1pfn < 0x100) || (l1pfn > max_page) )
continue;
void unshadow_table(unsigned long gpfn, unsigned int type)
{
unsigned long spfn;
- struct domain *d = frame_table[gpfn].u.inuse.domain;
+ struct domain *d = page_get_owner(&frame_table[gpfn]);
SH_VLOG("unshadow_table type=%08x gpfn=%08lx", type, gpfn);
spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
- mk_l2_pgentry(__pa(frame_table[gpfn].u.inuse.domain->mm_perdomain_pt) |
+ mk_l2_pgentry(__pa(page_get_owner(&frame_table[gpfn])->mm_perdomain_pt) |
__PAGE_HYPERVISOR);
}
#endif
if (m->shadow_mode != SHM_full_32) {
if ( (l2_pgentry_val(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]) !=
- ((__pa(frame_table[gpfn].u.inuse.domain->mm.perdomain_pt) |
+ ((__pa(page_get_owner(&frame_table[gpfn])->mm.perdomain_pt) |
__PAGE_HYPERVISOR))) )
FAILPT("hypervisor per-domain map inconsistent");
}
--- /dev/null
+/******************************************************************************
+ * domain_build.c
+ *
+ * Copyright (c) 2002-2005, K A Fraser
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/smp.h>
+#include <xen/delay.h>
+#include <asm/regs.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+#include <asm/i387.h>
+#include <xen/event.h>
+#include <xen/elf.h>
+#include <xen/kernel.h>
+
+/* No ring-3 access in initial page tables. */
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+
+#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
+#define round_pgdown(_p) ((_p)&PAGE_MASK)
+
+int construct_dom0(struct domain *p,
+ unsigned long alloc_start,
+ unsigned long alloc_end,
+ char *image_start, unsigned long image_len,
+ char *initrd_start, unsigned long initrd_len,
+ char *cmdline)
+{
+ char *dst;
+ int i, rc;
+ unsigned long pfn, mfn;
+ unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT;
+ unsigned long nr_pt_pages;
+ unsigned long count;
+ l2_pgentry_t *l2tab, *l2start;
+ l1_pgentry_t *l1tab = NULL, *l1start = NULL;
+ struct pfn_info *page = NULL;
+ start_info_t *si;
+ struct exec_domain *ed = p->exec_domain[0];
+
+ /*
+ * This fully describes the memory layout of the initial domain. All
+ * *_start address are page-aligned, except v_start (and v_end) which are
+ * superpage-aligned.
+ */
+ struct domain_setup_info dsi;
+ unsigned long vinitrd_start;
+ unsigned long vinitrd_end;
+ unsigned long vphysmap_start;
+ unsigned long vphysmap_end;
+ unsigned long vstartinfo_start;
+ unsigned long vstartinfo_end;
+ unsigned long vstack_start;
+ unsigned long vstack_end;
+ unsigned long vpt_start;
+ unsigned long vpt_end;
+ unsigned long v_end;
+
+ /* Machine address of next candidate page-table page. */
+ unsigned long mpt_alloc;
+
+ extern void physdev_init_dom0(struct domain *);
+
+ /* Sanity! */
+ if ( p->id != 0 )
+ BUG();
+ if ( test_bit(DF_CONSTRUCTED, &p->d_flags) )
+ BUG();
+
+ memset(&dsi, 0, sizeof(struct domain_setup_info));
+
+ printk("*** LOADING DOMAIN 0 ***\n");
+
+ /*
+ * This is all a bit grim. We've moved the modules to the "safe" physical
+ * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this
+ * routine we're going to copy it down into the region that's actually
+ * been allocated to domain 0. This is highly likely to be overlapping, so
+ * we use a forward copy.
+ *
+ * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with
+ * 4GB and lots of network/disk cards that allocate loads of buffers.
+ * We'll have to revisit this if we ever support PAE (64GB).
+ */
+
+ rc = parseelfimage(image_start, image_len, &dsi);
+ if ( rc != 0 )
+ return rc;
+
+ /* Set up domain options */
+ if ( dsi.use_writable_pagetables )
+ vm_assist(p, VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
+
+ if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
+ {
+ printk("Initial guest OS must load to a page boundary.\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Why do we need this? The number of page-table frames depends on the
+ * size of the bootstrap address space. But the size of the address space
+ * depends on the number of page-table frames (since each one is mapped
+ * read-only). We have a pair of simultaneous equations in two unknowns,
+ * which we solve by exhaustive search.
+ */
+ vinitrd_start = round_pgup(dsi.v_kernend);
+ vinitrd_end = vinitrd_start + initrd_len;
+ vphysmap_start = round_pgup(vinitrd_end);
+ vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long));
+ vpt_start = round_pgup(vphysmap_end);
+ for ( nr_pt_pages = 2; ; nr_pt_pages++ )
+ {
+ vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
+ vstartinfo_start = vpt_end;
+ vstartinfo_end = vstartinfo_start + PAGE_SIZE;
+ vstack_start = vstartinfo_end;
+ vstack_end = vstack_start + PAGE_SIZE;
+ v_end = (vstack_end + (1<<22)-1) & ~((1<<22)-1);
+ if ( (v_end - vstack_end) < (512 << 10) )
+ v_end += 1 << 22; /* Add extra 4MB to get >= 512kB padding. */
+ if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >>
+ L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
+ break;
+ }
+
+ printk("PHYSICAL MEMORY ARRANGEMENT:\n"
+ " Kernel image: %p->%p\n"
+ " Initrd image: %p->%p\n"
+ " Dom0 alloc.: %08lx->%08lx\n",
+ image_start, image_start + image_len,
+ initrd_start, initrd_start + initrd_len,
+ alloc_start, alloc_end);
+ printk("VIRTUAL MEMORY ARRANGEMENT:\n"
+ " Loaded kernel: %08lx->%08lx\n"
+ " Init. ramdisk: %08lx->%08lx\n"
+ " Phys-Mach map: %08lx->%08lx\n"
+ " Page tables: %08lx->%08lx\n"
+ " Start info: %08lx->%08lx\n"
+ " Boot stack: %08lx->%08lx\n"
+ " TOTAL: %08lx->%08lx\n",
+ dsi.v_kernstart, dsi.v_kernend,
+ vinitrd_start, vinitrd_end,
+ vphysmap_start, vphysmap_end,
+ vpt_start, vpt_end,
+ vstartinfo_start, vstartinfo_end,
+ vstack_start, vstack_end,
+ dsi.v_start, v_end);
+ printk(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
+
+ if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
+ {
+ printk("Initial guest OS requires too much space\n"
+ "(%luMB is greater than %luMB limit)\n",
+ (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
+ return -ENOMEM;
+ }
+
+ /*
+ * Protect the lowest 1GB of memory. We use a temporary mapping there
+ * from which we copy the kernel and ramdisk images.
+ */
+ if ( dsi.v_start < (1<<30) )
+ {
+ printk("Initial loading isn't allowed to lowest 1GB of memory.\n");
+ return -EINVAL;
+ }
+
+ /* Paranoia: scrub DOM0's memory allocation. */
+ printk("Scrubbing DOM0 RAM: ");
+ dst = (char *)alloc_start;
+ while ( dst < (char *)alloc_end )
+ {
+#define SCRUB_BYTES (100 * 1024 * 1024) /* 100MB */
+ printk(".");
+ touch_nmi_watchdog();
+ if ( ((char *)alloc_end - dst) > SCRUB_BYTES )
+ {
+ memset(dst, 0, SCRUB_BYTES);
+ dst += SCRUB_BYTES;
+ }
+ else
+ {
+ memset(dst, 0, (char *)alloc_end - dst);
+ break;
+ }
+ }
+ printk("done.\n");
+
+ /* Construct a frame-allocation list for the initial domain. */
+ for ( mfn = (alloc_start>>PAGE_SHIFT);
+ mfn < (alloc_end>>PAGE_SHIFT);
+ mfn++ )
+ {
+ page = &frame_table[mfn];
+ page_set_owner(page, p);
+ page->u.inuse.type_info = 0;
+ page->count_info = PGC_allocated | 1;
+ list_add_tail(&page->list, &p->page_list);
+ p->tot_pages++; p->max_pages++;
+ }
+
+ mpt_alloc = (vpt_start - dsi.v_start) + alloc_start;
+
+ SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES);
+ SET_GDT_ADDRESS(ed, DEFAULT_GDT_ADDRESS);
+
+ /*
+ * We're basically forcing default RPLs to 1, so that our "what privilege
+ * level are we returning to?" logic works.
+ */
+ ed->thread.failsafe_selector = FLAT_GUESTOS_CS;
+ ed->thread.event_selector = FLAT_GUESTOS_CS;
+ ed->thread.guestos_ss = FLAT_GUESTOS_DS;
+ for ( i = 0; i < 256; i++ )
+ ed->thread.traps[i].cs = FLAT_GUESTOS_CS;
+
+ /* WARNING: The new domain must have its 'processor' field filled in! */
+ l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
+ memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
+ l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
+ mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR);
+ l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
+ mk_l2_pgentry(__pa(p->mm_perdomain_pt) | __PAGE_HYPERVISOR);
+ ed->mm.pagetable = mk_pagetable((unsigned long)l2start);
+
+ l2tab += l2_table_offset(dsi.v_start);
+ mfn = alloc_start >> PAGE_SHIFT;
+ for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
+ {
+ if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
+ {
+ l1start = l1tab = (l1_pgentry_t *)mpt_alloc;
+ mpt_alloc += PAGE_SIZE;
+ *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT);
+ clear_page(l1tab);
+ if ( count == 0 )
+ l1tab += l1_table_offset(dsi.v_start);
+ }
+ *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT);
+
+ page = &frame_table[mfn];
+ if ( !get_page_and_type(page, p, PGT_writable_page) )
+ BUG();
+
+ mfn++;
+ }
+
+ /* Pages that are part of page tables must be read only. */
+ l2tab = l2start + l2_table_offset(vpt_start);
+ l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
+ l1tab += l1_table_offset(vpt_start);
+ l2tab++;
+ for ( count = 0; count < nr_pt_pages; count++ )
+ {
+ *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
+ page = &frame_table[l1_pgentry_to_pagenr(*l1tab)];
+ if ( count == 0 )
+ {
+ page->u.inuse.type_info &= ~PGT_type_mask;
+ page->u.inuse.type_info |= PGT_l2_page_table;
+
+ /*
+ * No longer writable: decrement the type_count.
+ * Installed as CR3: increment both the ref_count and type_count.
+ * Net: just increment the ref_count.
+ */
+ get_page(page, p); /* an extra ref because of readable mapping */
+
+ /* Get another ref to L2 page so that it can be pinned. */
+ if ( !get_page_and_type(page, p, PGT_l2_page_table) )
+ BUG();
+ set_bit(_PGT_pinned, &page->u.inuse.type_info);
+ }
+ else
+ {
+ page->u.inuse.type_info &= ~PGT_type_mask;
+ page->u.inuse.type_info |= PGT_l1_page_table;
+ page->u.inuse.type_info |=
+ ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<<PGT_va_shift;
+
+ /*
+ * No longer writable: decrement the type_count.
+ * This is an L1 page, installed in a validated L2 page:
+ * increment both the ref_count and type_count.
+ * Net: just increment the ref_count.
+ */
+ get_page(page, p); /* an extra ref because of readable mapping */
+ }
+ l1tab++;
+ if( !((unsigned long)l1tab & (PAGE_SIZE - 1)) )
+ l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
+ }
+
+ /* Set up shared-info area. */
+ update_dom_time(p);
+ p->shared_info->domain_time = 0;
+ /* Mask all upcalls... */
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ p->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
+ p->shared_info->n_vcpu = smp_num_cpus;
+
+ /* Install the new page tables. */
+ __cli();
+ write_ptbase(&ed->mm);
+
+ /* Copy the OS image. */
+ (void)loadelfimage(image_start);
+
+ /* Copy the initial ramdisk. */
+ if ( initrd_len != 0 )
+ memcpy((void *)vinitrd_start, initrd_start, initrd_len);
+
+ /* Set up start info area. */
+ si = (start_info_t *)vstartinfo_start;
+ memset(si, 0, PAGE_SIZE);
+ si->nr_pages = p->tot_pages;
+ si->shared_info = virt_to_phys(p->shared_info);
+ si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
+ si->pt_base = vpt_start;
+ si->nr_pt_frames = nr_pt_pages;
+ si->mfn_list = vphysmap_start;
+
+ /* Write the phys->machine and machine->phys table entries. */
+ for ( pfn = 0; pfn < p->tot_pages; pfn++ )
+ {
+ mfn = pfn + (alloc_start>>PAGE_SHIFT);
+#ifndef NDEBUG
+#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT)
+ if ( pfn > REVERSE_START )
+ mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START);
+#endif
+ ((unsigned long *)vphysmap_start)[pfn] = mfn;
+ machine_to_phys_mapping[mfn] = pfn;
+ }
+
+ if ( initrd_len != 0 )
+ {
+ si->mod_start = vinitrd_start;
+ si->mod_len = initrd_len;
+ printk("Initrd len 0x%lx, start at 0x%08lx\n",
+ si->mod_len, si->mod_start);
+ }
+
+ dst = si->cmd_line;
+ if ( cmdline != NULL )
+ {
+ for ( i = 0; i < 255; i++ )
+ {
+ if ( cmdline[i] == '\0' )
+ break;
+ *dst++ = cmdline[i];
+ }
+ }
+ *dst = '\0';
+
+ /* Reinstate the caller's page tables. */
+ write_ptbase(¤t->mm);
+ __sti();
+
+ /* Destroy low mappings - they were only for our convenience. */
+ for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
+ if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE )
+ l2start[i] = mk_l2_pgentry(0);
+ zap_low_mappings(); /* Do the same for the idle page tables. */
+
+ /* DOM0 gets access to everything. */
+ physdev_init_dom0(p);
+
+ set_bit(DF_CONSTRUCTED, &p->d_flags);
+
+ new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start);
+
+#if 0 /* XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) */
+ shadow_lock(&p->mm);
+ shadow_mode_enable(p, SHM_test);
+ shadow_unlock(&p->mm);
+#endif
+
+ return 0;
+}
* 64-bit operations on them. Also, just for sanity, we assert the size
* of the structure here.
*/
- if ( (offsetof(struct pfn_info, u.inuse.domain) !=
+ if ( (offsetof(struct pfn_info, u.inuse._domain) !=
(offsetof(struct pfn_info, count_info) + sizeof(u32))) ||
(sizeof(struct pfn_info) != 24) )
{
printk("Weird pfn_info layout (%ld,%ld,%d)\n",
offsetof(struct pfn_info, count_info),
- offsetof(struct pfn_info, u.inuse.domain),
+ offsetof(struct pfn_info, u.inuse._domain),
sizeof(struct pfn_info));
for ( ; ; ) ;
}
idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]);
for ( i = 0; i < 1024; i++ )
{
- frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
+ frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
/* gdt to make sure it's only mapped read-only by non-privileged
domains. */
frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
- frame_table[m2p_start_mfn+i].u.inuse.domain = dom_xen;
+ page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen);
}
}
--- /dev/null
+/******************************************************************************
+ * domain_build.c
+ *
+ * Copyright (c) 2002-2005, K A Fraser
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/smp.h>
+#include <xen/delay.h>
+#include <asm/regs.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+#include <asm/i387.h>
+#include <xen/event.h>
+#include <xen/elf.h>
+#include <xen/kernel.h>
+
+/* Allow ring-3 access in long mode as guest cannot use ring 1. */
+#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+
+#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
+#define round_pgdown(_p) ((_p)&PAGE_MASK)
+
+int construct_dom0(struct domain *p,
+ unsigned long alloc_start,
+ unsigned long alloc_end,
+ char *image_start, unsigned long image_len,
+ char *initrd_start, unsigned long initrd_len,
+ char *cmdline)
+{
+ char *dst;
+ int i, rc;
+ unsigned long pfn, mfn;
+ unsigned long nr_pages = (alloc_end - alloc_start) >> PAGE_SHIFT;
+ unsigned long nr_pt_pages;
+ unsigned long count;
+ l2_pgentry_t *l2tab, *l2start;
+ l1_pgentry_t *l1tab = NULL, *l1start = NULL;
+ struct pfn_info *page = NULL;
+ start_info_t *si;
+ struct exec_domain *ed = p->exec_domain[0];
+
+ /*
+ * This fully describes the memory layout of the initial domain. All
+ * *_start address are page-aligned, except v_start (and v_end) which are
+ * superpage-aligned.
+ */
+ struct domain_setup_info dsi;
+ unsigned long vinitrd_start;
+ unsigned long vinitrd_end;
+ unsigned long vphysmap_start;
+ unsigned long vphysmap_end;
+ unsigned long vstartinfo_start;
+ unsigned long vstartinfo_end;
+ unsigned long vstack_start;
+ unsigned long vstack_end;
+ unsigned long vpt_start;
+ unsigned long vpt_end;
+ unsigned long v_end;
+
+ /* Machine address of next candidate page-table page. */
+ unsigned long mpt_alloc;
+
+ extern void physdev_init_dom0(struct domain *);
+
+ /* Sanity! */
+ if ( p->id != 0 )
+ BUG();
+ if ( test_bit(DF_CONSTRUCTED, &p->d_flags) )
+ BUG();
+
+ memset(&dsi, 0, sizeof(struct domain_setup_info));
+
+ printk("*** LOADING DOMAIN 0 ***\n");
+
+ /*
+ * This is all a bit grim. We've moved the modules to the "safe" physical
+ * memory region above MAP_DIRECTMAP_ADDRESS (48MB). Later in this
+ * routine we're going to copy it down into the region that's actually
+ * been allocated to domain 0. This is highly likely to be overlapping, so
+ * we use a forward copy.
+ *
+ * MAP_DIRECTMAP_ADDRESS should be safe. The worst case is a machine with
+ * 4GB and lots of network/disk cards that allocate loads of buffers.
+ * We'll have to revisit this if we ever support PAE (64GB).
+ */
+
+ rc = parseelfimage(image_start, image_len, &dsi);
+ if ( rc != 0 )
+ return rc;
+
+ /* Set up domain options */
+ if ( dsi.use_writable_pagetables )
+ vm_assist(p, VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
+
+ if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
+ {
+ printk("Initial guest OS must load to a page boundary.\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Why do we need this? The number of page-table frames depends on the
+ * size of the bootstrap address space. But the size of the address space
+ * depends on the number of page-table frames (since each one is mapped
+ * read-only). We have a pair of simultaneous equations in two unknowns,
+ * which we solve by exhaustive search.
+ */
+ vinitrd_start = round_pgup(dsi.v_kernend);
+ vinitrd_end = vinitrd_start + initrd_len;
+ vphysmap_start = round_pgup(vinitrd_end);
+ vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long));
+ vpt_start = round_pgup(vphysmap_end);
+ for ( nr_pt_pages = 2; ; nr_pt_pages++ )
+ {
+ vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
+ vstartinfo_start = vpt_end;
+ vstartinfo_end = vstartinfo_start + PAGE_SIZE;
+ vstack_start = vstartinfo_end;
+ vstack_end = vstack_start + PAGE_SIZE;
+ v_end = (vstack_end + (1<<22)-1) & ~((1<<22)-1);
+ if ( (v_end - vstack_end) < (512 << 10) )
+ v_end += 1 << 22; /* Add extra 4MB to get >= 512kB padding. */
+ if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >>
+ L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
+ break;
+ }
+
+ printk("PHYSICAL MEMORY ARRANGEMENT:\n"
+ " Kernel image: %p->%p\n"
+ " Initrd image: %p->%p\n"
+ " Dom0 alloc.: %08lx->%08lx\n",
+ image_start, image_start + image_len,
+ initrd_start, initrd_start + initrd_len,
+ alloc_start, alloc_end);
+ printk("VIRTUAL MEMORY ARRANGEMENT:\n"
+ " Loaded kernel: %08lx->%08lx\n"
+ " Init. ramdisk: %08lx->%08lx\n"
+ " Phys-Mach map: %08lx->%08lx\n"
+ " Page tables: %08lx->%08lx\n"
+ " Start info: %08lx->%08lx\n"
+ " Boot stack: %08lx->%08lx\n"
+ " TOTAL: %08lx->%08lx\n",
+ dsi.v_kernstart, dsi.v_kernend,
+ vinitrd_start, vinitrd_end,
+ vphysmap_start, vphysmap_end,
+ vpt_start, vpt_end,
+ vstartinfo_start, vstartinfo_end,
+ vstack_start, vstack_end,
+ dsi.v_start, v_end);
+ printk(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
+
+ if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
+ {
+ printk("Initial guest OS requires too much space\n"
+ "(%luMB is greater than %luMB limit)\n",
+ (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
+ return -ENOMEM;
+ }
+
+ /*
+ * Protect the lowest 1GB of memory. We use a temporary mapping there
+ * from which we copy the kernel and ramdisk images.
+ */
+ if ( dsi.v_start < (1<<30) )
+ {
+ printk("Initial loading isn't allowed to lowest 1GB of memory.\n");
+ return -EINVAL;
+ }
+
+ /* Paranoia: scrub DOM0's memory allocation. */
+ printk("Scrubbing DOM0 RAM: ");
+ dst = (char *)alloc_start;
+ while ( dst < (char *)alloc_end )
+ {
+#define SCRUB_BYTES (100 * 1024 * 1024) /* 100MB */
+ printk(".");
+ touch_nmi_watchdog();
+ if ( ((char *)alloc_end - dst) > SCRUB_BYTES )
+ {
+ memset(dst, 0, SCRUB_BYTES);
+ dst += SCRUB_BYTES;
+ }
+ else
+ {
+ memset(dst, 0, (char *)alloc_end - dst);
+ break;
+ }
+ }
+ printk("done.\n");
+
+ /* Construct a frame-allocation list for the initial domain. */
+ for ( mfn = (alloc_start>>PAGE_SHIFT);
+ mfn < (alloc_end>>PAGE_SHIFT);
+ mfn++ )
+ {
+ page = &frame_table[mfn];
+ page_set_owner(page, p);
+ page->u.inuse.type_info = 0;
+ page->count_info = PGC_allocated | 1;
+ list_add_tail(&page->list, &p->page_list);
+ p->tot_pages++; p->max_pages++;
+ }
+
+ mpt_alloc = (vpt_start - dsi.v_start) + alloc_start;
+
+ SET_GDT_ENTRIES(ed, DEFAULT_GDT_ENTRIES);
+ SET_GDT_ADDRESS(ed, DEFAULT_GDT_ADDRESS);
+
+ /*
+ * We're basically forcing default RPLs to 1, so that our "what privilege
+ * level are we returning to?" logic works.
+ */
+ ed->thread.failsafe_selector = FLAT_GUESTOS_CS;
+ ed->thread.event_selector = FLAT_GUESTOS_CS;
+ ed->thread.guestos_ss = FLAT_GUESTOS_DS;
+ for ( i = 0; i < 256; i++ )
+ ed->thread.traps[i].cs = FLAT_GUESTOS_CS;
+
+ /* WARNING: The new domain must have its 'processor' field filled in! */
+ l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
+ memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
+ l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
+ mk_l2_pgentry((unsigned long)l2start | __PAGE_HYPERVISOR);
+ l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
+ mk_l2_pgentry(__pa(p->mm_perdomain_pt) | __PAGE_HYPERVISOR);
+ ed->mm.pagetable = mk_pagetable((unsigned long)l2start);
+
+ l2tab += l2_table_offset(dsi.v_start);
+ mfn = alloc_start >> PAGE_SHIFT;
+ for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
+ {
+ if ( !((unsigned long)l1tab & (PAGE_SIZE-1)) )
+ {
+ l1start = l1tab = (l1_pgentry_t *)mpt_alloc;
+ mpt_alloc += PAGE_SIZE;
+ *l2tab++ = mk_l2_pgentry((unsigned long)l1start | L2_PROT);
+ clear_page(l1tab);
+ if ( count == 0 )
+ l1tab += l1_table_offset(dsi.v_start);
+ }
+ *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT);
+
+ page = &frame_table[mfn];
+ if ( !get_page_and_type(page, p, PGT_writable_page) )
+ BUG();
+
+ mfn++;
+ }
+
+ /* Pages that are part of page tables must be read only. */
+ l2tab = l2start + l2_table_offset(vpt_start);
+ l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
+ l1tab += l1_table_offset(vpt_start);
+ l2tab++;
+ for ( count = 0; count < nr_pt_pages; count++ )
+ {
+ *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
+ page = &frame_table[l1_pgentry_to_pagenr(*l1tab)];
+ if ( count == 0 )
+ {
+ page->u.inuse.type_info &= ~PGT_type_mask;
+ page->u.inuse.type_info |= PGT_l2_page_table;
+
+ /*
+ * No longer writable: decrement the type_count.
+ * Installed as CR3: increment both the ref_count and type_count.
+ * Net: just increment the ref_count.
+ */
+ get_page(page, p); /* an extra ref because of readable mapping */
+
+ /* Get another ref to L2 page so that it can be pinned. */
+ if ( !get_page_and_type(page, p, PGT_l2_page_table) )
+ BUG();
+ set_bit(_PGT_pinned, &page->u.inuse.type_info);
+ }
+ else
+ {
+ page->u.inuse.type_info &= ~PGT_type_mask;
+ page->u.inuse.type_info |= PGT_l1_page_table;
+ page->u.inuse.type_info |=
+ ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-1))<<PGT_va_shift;
+
+ /*
+ * No longer writable: decrement the type_count.
+ * This is an L1 page, installed in a validated L2 page:
+ * increment both the ref_count and type_count.
+ * Net: just increment the ref_count.
+ */
+ get_page(page, p); /* an extra ref because of readable mapping */
+ }
+ l1tab++;
+ if( !((unsigned long)l1tab & (PAGE_SIZE - 1)) )
+ l1start = l1tab = (l1_pgentry_t *)l2_pgentry_to_phys(*l2tab);
+ }
+
+ /* Set up shared-info area. */
+ update_dom_time(p);
+ p->shared_info->domain_time = 0;
+ /* Mask all upcalls... */
+ for ( i = 0; i < MAX_VIRT_CPUS; i++ )
+ p->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
+ p->shared_info->n_vcpu = smp_num_cpus;
+
+ /* Install the new page tables. */
+ __cli();
+ write_ptbase(&ed->mm);
+
+ /* Copy the OS image. */
+ (void)loadelfimage(image_start);
+
+ /* Copy the initial ramdisk. */
+ if ( initrd_len != 0 )
+ memcpy((void *)vinitrd_start, initrd_start, initrd_len);
+
+ /* Set up start info area. */
+ si = (start_info_t *)vstartinfo_start;
+ memset(si, 0, PAGE_SIZE);
+ si->nr_pages = p->tot_pages;
+ si->shared_info = virt_to_phys(p->shared_info);
+ si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN;
+ si->pt_base = vpt_start;
+ si->nr_pt_frames = nr_pt_pages;
+ si->mfn_list = vphysmap_start;
+
+ /* Write the phys->machine and machine->phys table entries. */
+ for ( pfn = 0; pfn < p->tot_pages; pfn++ )
+ {
+ mfn = pfn + (alloc_start>>PAGE_SHIFT);
+#ifndef NDEBUG
+#define REVERSE_START ((v_end - dsi.v_start) >> PAGE_SHIFT)
+ if ( pfn > REVERSE_START )
+ mfn = (alloc_end>>PAGE_SHIFT) - (pfn - REVERSE_START);
+#endif
+ ((unsigned long *)vphysmap_start)[pfn] = mfn;
+ machine_to_phys_mapping[mfn] = pfn;
+ }
+
+ if ( initrd_len != 0 )
+ {
+ si->mod_start = vinitrd_start;
+ si->mod_len = initrd_len;
+ printk("Initrd len 0x%lx, start at 0x%08lx\n",
+ si->mod_len, si->mod_start);
+ }
+
+ dst = si->cmd_line;
+ if ( cmdline != NULL )
+ {
+ for ( i = 0; i < 255; i++ )
+ {
+ if ( cmdline[i] == '\0' )
+ break;
+ *dst++ = cmdline[i];
+ }
+ }
+ *dst = '\0';
+
+ /* Reinstate the caller's page tables. */
+ write_ptbase(¤t->mm);
+ __sti();
+
+ /* Destroy low mappings - they were only for our convenience. */
+ for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
+ if ( l2_pgentry_val(l2start[i]) & _PAGE_PSE )
+ l2start[i] = mk_l2_pgentry(0);
+ zap_low_mappings(); /* Do the same for the idle page tables. */
+
+ /* DOM0 gets access to everything. */
+ physdev_init_dom0(p);
+
+ set_bit(DF_CONSTRUCTED, &p->d_flags);
+
+ new_thread(ed, dsi.v_kernentry, vstack_end, vstartinfo_start);
+
+#if 0 /* XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) */
+ shadow_lock(&p->mm);
+ shadow_mode_enable(p, SHM_test);
+ shadow_unlock(&p->mm);
+#endif
+
+ return 0;
+}
l3_pgentry_t l3e;
l2_pgentry_t l2e;
+ /*
+ * We are rather picky about the layout of 'struct pfn_info'. The
+ * count_info and domain fields must be adjacent, as we perform atomic
+ * 64-bit operations on them.
+ */
+ if ( (offsetof(struct pfn_info, u.inuse._domain) !=
+ (offsetof(struct pfn_info, count_info) + sizeof(u32))) )
+ {
+ printk("Weird pfn_info layout (%ld,%ld,%d)\n",
+ offsetof(struct pfn_info, count_info),
+ offsetof(struct pfn_info, u.inuse._domain),
+ sizeof(struct pfn_info));
+ for ( ; ; ) ;
+ }
+
/* M2P table is mappable read-only by privileged domains. */
for ( v = RDWR_MPT_VIRT_START;
v != RDWR_MPT_VIRT_END;
for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
{
- frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
+ frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
/* gdt to make sure it's only mapped read-only by non-privileged
domains. */
frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
- frame_table[m2p_start_mfn+i].u.inuse.domain = dom_xen;
+ page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen);
}
}
}
for ( i = 0; i < (1 << order); i++ )
{
pg[i].count_info = 0;
- pg[i].u.inuse.domain = NULL;
+ pg[i].u.inuse._domain = 0;
pg[i].u.inuse.type_info = 0;
}
}
pg[i].count_info = 0;
- pg[i].u.inuse.domain = NULL;
+ pg[i].u.inuse._domain = 0;
pg[i].u.inuse.type_info = 0;
}
for ( i = 0; i < (1 << order); i++ )
{
- pg[i].u.inuse.domain = d;
+ page_set_owner(&pg[i], d);
wmb(); /* Domain pointer must be visible before updating refcnt. */
pg[i].count_info |= PGC_allocated | 1;
list_add_tail(&pg[i].list, &d->page_list);
void free_domheap_pages(struct pfn_info *pg, unsigned int order)
{
int i, drop_dom_ref;
- struct domain *d = pg->u.inuse.domain;
+ struct domain *d = page_get_owner(pg);
struct exec_domain *ed;
void *p;
int cpu_mask = 0;
/* Each frame can be threaded onto a doubly-linked list. */
struct list_head list;
+ /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
+ u32 tlbflush_timestamp;
+
/* Reference count and various PGC_xxx flags and fields. */
u32 count_info;
/* Page is in use: ((count_info & PGC_count_mask) != 0). */
struct {
/* Owner of this page (NULL if page is anonymous). */
- struct domain *domain;
+ u32 _domain; /* pickled format */
/* Type reference count and various PGT_xxx flags and fields. */
u32 type_info;
- } inuse;
+ } PACKED inuse;
/* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
struct {
/* Mask of possibly-tainted TLBs. */
- unsigned long cpu_mask;
+ u32 cpu_mask;
/* Order-size of the free chunk this page is the head of. */
u8 order;
- } free;
+ } PACKED free;
- } u;
+ } PACKED u;
- /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
- u32 tlbflush_timestamp;
-};
+} PACKED;
/* The following page types are MUTUALLY EXCLUSIVE. */
#define PGT_none (0<<29) /* no special uses of this page */
#define IS_XEN_HEAP_FRAME(_pfn) (page_to_phys(_pfn) < xenheap_phys_end)
+#if defined(__i386__)
+
+#define pickle_domptr(_d) ((u32)(unsigned long)(_d))
+#define unpickle_domptr(_d) ((struct domain *)(unsigned long)(_d))
+
+#elif defined(__x86_64__)
+static inline struct domain *unpickle_domptr(u32 _domain)
+{ return (_domain == 0) ? NULL : __va(_domain); }
+static inline u32 pickle_domptr(struct domain *domain)
+{ return (domain == NULL) ? 0 : (u32)__pa(domain); }
+
+#endif
+
+#define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
+#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
+
#define SHARE_PFN_WITH_DOMAIN(_pfn, _dom) \
do { \
- (_pfn)->u.inuse.domain = (_dom); \
+ page_set_owner((_pfn), (_dom)); \
/* The incremented type count is intended to pin to 'writable'. */ \
(_pfn)->u.inuse.type_info = PGT_writable_page | PGT_validated | 1; \
wmb(); /* install valid domain ptr before updating refcnt. */ \
struct domain *domain)
{
u32 x, nx, y = page->count_info;
- struct domain *d, *nd = page->u.inuse.domain;
+ u32 d, nd = page->u.inuse._domain;
+ u32 _domain = pickle_domptr(domain);
do {
x = y;
d = nd;
if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */
unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
- unlikely(d != domain) ) /* Wrong owner? */
+ unlikely(d != _domain) ) /* Wrong owner? */
{
DPRINTK("Error pfn %08lx: ed=%p, sd=%p, caf=%08x, taf=%08x\n",
- page_to_pfn(page), domain, d,
+ page_to_pfn(page), domain, unpickle_domptr(d),
x, page->u.inuse.type_info);
return 0;
}
ASSERT(((_p)->u.inuse.type_info & PGT_count_mask) != 0)
#define ASSERT_PAGE_IS_DOMAIN(_p, _d) \
ASSERT(((_p)->count_info & PGC_count_mask) != 0); \
- ASSERT((_p)->u.inuse.domain == (_d))
+ ASSERT(page_get_owner(_p) == (_d))
int check_descriptor(unsigned long *d);
SH_LOG("mark_dirty OOR! mfn=%x pfn=%lx max=%x (mm %p)",
mfn, pfn, m->shadow_dirty_bitmap_size, m );
SH_LOG("dom=%p caf=%08x taf=%08x\n",
- frame_table[mfn].u.inuse.domain,
+ page_get_owner(&frame_table[mfn]),
frame_table[mfn].count_info,
frame_table[mfn].u.inuse.type_info );
}